############################################
# GHS 
# Date: MAY 19 2014
###########################################


##############################################
#########Estimation Functions#################
##############################################

# FUNCTION FOR CLUSTERED SEs
cl   <- function(dat,fm, cluster){
  require(sandwich, quietly = TRUE)
  require(lmtest, quietly = TRUE)
  M <- length(unique(cluster))
  N <- length(cluster)
  K <- fm$rank
  dfc <- (M/(M-1))*((N-1)/(N-K))
  uj  <- apply(estfun(fm),2, function(x) tapply(x, cluster, sum));
  vcovCL <- dfc*sandwich(fm, meat=crossprod(uj)/N)
  coeftest(fm, vcovCL) }

# Function estimates linear b; BLOCKS and controls allowed 
# p value can be 1 (one sided neg), 2 (2 sided) or 3 (one sided pos) 
  ri_lm = function(y, x,  BLOCK,  PM, C=TRUE, addn=FALSE,
                 control1=rep(0, length(y)), control2=control1,    
                 round=3, onesided=2){

  f = function(x){coef(    lm(y[C]~x[C]+control1[C]+control2[C]+as.factor(BLOCK[C])))[2]}
  PERM = PM[C,]
  bs= apply(PERM,2,f)
 
  if(onesided==1) p = mean(bs<=f(x))  # neg test
  if(onesided==2) p = mean(abs(bs)>=abs(f(x))) # two sided test
  if(onesided==3) p = mean(bs>=f(x)) # pos test

  X=c(round(f(x),round),paste("(",p,")", sep=""))
  if(addn) X = c(X, length(y[C]))
  X}
  
  
###############################################################################################################3
# Function to produce table for different dep vars, treatments, etc


# Function to produce table for different dep vars, treatments, etc
h1tabf 	= function(C, Y = D$MARGINAL, YLAB = "Share Marginalized", dig=2, pctile= .955, data=D){
  ENGB 	= (rank(D$ENGAGED[C])>max(rank(D$ENGAGED[C]))*pctile)   # DEFINE ENG VARIABLE FOR THE SET IN QUESTION
  SMSC  = D$SMS[C]
  msur 	<- systemfit(list(SMSreg = Y[C]~SMSC, ENGreg = Y[C]~ENGB), data=data)
  CM	=coef(summary(msur))
  LH0 	= linearHypothesis(msur, "SMSreg_SMSC + SMSreg_(Intercept) - ENGreg_ENGBTRUE - ENGreg_(Intercept)", test = "Chisq", na.rm=TRUE)
  rbind(paste(YLAB, "among the engaged type=" , 		round(CM[3,1]+CM[4,1], dig)),
        paste(YLAB, "among the SMS access population", 	round(CM[1,1]+CM[2,1], dig)), 
        paste("Difference=", 							round(CM[1,1]+CM[2,1]- CM[3,1]- CM[4,1], dig)), 
        paste("(p)-- two tailed=", 						round(LH0[[4]][[2]],dig)),
        paste("n=", 									round(length(Y[C]), dig))
		)
	}
		
		
####################################################################################

lgraph = function(X,Y,CONDITION, main="", xlab="X", ylab="Y", ylim=NULL, deg=2, ...){
  fit <- locfit(Y[CONDITION]~lp(X[CONDITION], deg=deg, ...)); crit(fit) <- crit(fit,cov=0.95); 
  plot(fit,band="local", main=main, xlab=xlab, ylab=ylab, ylim=ylim)
}

# Linear effect of subsidization (going from highest price to lowest price)
h = function(DEP, X, CONTROL=rep(0, length(DEP))){2*coef(lm(DEP~X+CONTROL))[2]}

# Permutations for linear effect
bsh = function(DEP, PM, CONTROL=rep(0, length(DEP))) { f=function(x) {h(DEP, x, CONTROL)} ; apply(PM,2,f)}

#########################################################################################################
# Randomization inference for heterogenous effects with no controls
#########################################################################################################
# T is treatment and X is a variable that conditions the effect of T on Y
ri.inter.nocontrol = function(Y, X, T, BLOCK, PM, onesided=3){

  # regression model without interaction
  n_inter <- lm(Y ~ T + X + as.factor(BLOCK)) 
  # regression model with interaction
  inter <- lm(Y ~ T*X + X + T + as.factor(BLOCK))
  
  # grab estimated coefficients
  coef   <- inter$coefficients["T:X"]     # we use this for p val
  T.real <- n_inter$coefficients["T"]
  X.real <- n_inter$coefficients["X"]

  # use estimated coefficients from base model to impute potential outcomes
  Y00 <- Y - T.real*T - X.real*X   
  
  sim.outcomes = function(T, cov, block) {
    Y.sim <- Y00
    Y.sim[T==1 & cov==0] <- Y00[T==1 & cov==0] + T.real 
    Y.sim[T==2 & cov==0] <- Y00[T==2 & cov==0] + T.real*2
    Y.sim[T==0 & cov==1] <- Y00[T==0 & cov==1] + X.real
    Y.sim[T==1 & cov==1] <- Y00[T==1 & cov==1] + T.real + X.real
    Y.sim[T==2 & cov==1] <- Y00[T==2 & cov==1] + T.real*2 + X.real
    
    interacted <- lm(Y.sim ~ T*cov + cov + T + as.factor(block))
    return(interacted$coefficients["T:cov"])
  }
  
  
  #return interacted coefficient over all possible simulated random assignments
  sims <- apply(X=PM, 2, FUN=sim.outcomes, cov=X, block=BLOCK)
  
  #p value
  if(onesided==1) p.val = mean(sims<= coef)  # neg test
  if(onesided==2) p.val = mean(abs(sims))>=abs(coef) # two sided test
  if(onesided==3) p.val = mean(sims>= coef) # pos test 
  
  out = rbind(coef, p.val)
  return(out)
}


#########################################################################################################
# Randomization inference for heterogenous effects with controls
#########################################################################################################

##argument for control is a data.frame object
# T is treatment and X is a variable that conditions the effect of T on Y
ri.inter = function(Y, X, T, control, BLOCK, PM, onesided=3){
  
  
    covs.formula <- paste0("~", paste0(colnames(control),collapse="+"), "-1")
    covs <- model.matrix(formula(covs.formula),data=control)
    
    # regression model without interaction
    n_inter <- lm(Y ~ T + X + covs + as.factor(BLOCK)) 
    # regression model with interaction
    inter <- lm(Y ~ T*X + X + T + covs + as.factor(BLOCK))
   
    
    # grab estimated coefficients
    coef  <- inter$coefficients["T:X"]
    T.real <- n_inter$coefficients["T"]
    X.real <- n_inter$coefficients["X"]
  
  
  # use estimated coefficients from base model to impute potential outcomes
  Y00 <- Y - T.real*T - X.real*X   
  
  sim.outcomes = function(T, cov, block, covs) {
    Y.sim <- Y00
    Y.sim[T==1 & cov==0] <- Y00[T==1 & cov==0] + T.real 
    Y.sim[T==2 & cov==0] <- Y00[T==2 & cov==0] + T.real*2
    Y.sim[T==0 & cov==1] <- Y00[T==0 & cov==1] + X.real
    Y.sim[T==1 & cov==1] <- Y00[T==1 & cov==1] + T.real + X.real
    Y.sim[T==2 & cov==1] <- Y00[T==2 & cov==1] + T.real*2 + X.real
    
    interacted <- lm(Y.sim ~ T*cov + cov + T + as.factor(block) + covs)
    return(interacted$coefficients["T:cov"])
    
  }
  
  
  #return interacted coefficient over all possible simulated random assignments
  sims <- apply(X=PM, 2, FUN=sim.outcomes, cov=X, block=BLOCK, covs=covs)
  
  #p value
    
  if(onesided==1) p.val = mean(sims<= coef)  # neg test
  if(onesided==2) p.val = mean(abs(sims))>=abs(coef) # two sided test
  if(onesided==3) p.val = mean(sims>= coef) # pos test
 

  out = rbind(coef, p.val)
  return(out)
}

############################################################################################
## RI for subgroup effects, controls enter as data.frame objects
## Note that subgroup effects are estimated within a single model here to make maximum use of blocks/controls
## Similar results however if subgroup analysis undertaken
## use effect type 1 when dummy var on covariate of interest takes a 1 if true
## use effect type 2 when dummy var on covariate of interest takes a 0 if true (eg. get effects among rich using POOR BI)
# RI for  effect conditional on X (T is treatment)
# Note: If effecttype = 1 marginal effect | X=1 is returned;l otherwise marginal effect given X=0


ri_sg = function(Y, T, X, BLOCKS=BLOCK, round=3, onesided=3, control=NULL, effecttype=1, PERM=PM){
  
  cov= function(control){
    covs.formula <- paste0("~", paste0(colnames(control),collapse="+"), "-1")
    model.matrix(formula(covs.formula),data=control)
  }
  
  if(is.null(control)) covs <- rep(1, length(Y)) else covs <- cov(control)
  
  f = function(T) {
    COEF = coef(lm(Y ~ (T*X) + X + T + covs + as.factor(BLOCKS)))
    COEF["T"] + (effecttype==1)*COEF["T:X"]}
  
  bs= apply(PERM,2,f)                                     #loop over all PM
  if(onesided==1) p = mean(bs<=f(T))  # neg test
  if(onesided==2) p = mean(abs(bs)>=abs(f(T))) # two sided test
  if(onesided==3) p = mean(bs>=f(T)) # pos test
  return(c(round(f(T),round),paste("(",p,")", sep="")))
}
  
##################################################################
# FUNCTION to plot a matrix of betas and standard errors (1 or two b's per cell)
# Used in Appendix
##################################################################
# groups can be 1 or 2 at the moment, giving the number of subgroups contained in  b and se; typically this is 1
# title="Plot", title of graph
# b, se, matrices
# rnames = names for rows of plot
# diag=0, diagonal plot? 
# norm = 1.5, this scales the x axis on each graph to fit the the [-.4,.4] on which data are plotted
# tick = .2, a tickmark on either side of 0 
# rset=1, a right shift parameter (creates space on left)
# uset, an  up shift parameters, creates space on teh bottom ; bottom cuts out lower area
# gap=.15, spacing between groups if there are two groups
# groups=1, if two groups of result exists; add them in the b and se matrices and set groups = 2
# z=1.96, 95% confidence intervals; this can be set at any level
# textsize=1 scale text

mat_plot2 = function(b, se, 
                     groups=1, diag=0, center=0,
                     title="Plot", rnames = 1:nrow(b),rnames_sub = NA, cnames = 1:ncol(b), 
                     rnamestab=0, rset=.5, rset2=0,
                     z=1.96, z2=1.645, p=matrix(NA,nrow(b), ncol(b)),pdigit=3,  
                     norm = 1.5, 
                     vline=TRUE, vlinex = 0, vcol="grey",  #vlinex x location of vertical line, relative to center
                     tick = .2, tick2=.1, # defined as units above or below center point (ie center+-tick)
                     ticktextsize = .5,  uset = 0, bottom=.3,  gap=.15, 
                     ninety=1, textsize=1){
  
  b=b-center
  
  w = ncol(b)/groups
  h = nrow(b)
  plot((1+rset):(w+rset), (1:w)*((h*groups+uset*2)), type = "n", axes=FALSE, xlab="", ylab="", ylim= c(bottom, h+uset*2), xlim= c(0, w+.5+rset), main=title)
  se_ = se*norm
  b_ = b*norm
  
  for(i in 1:w){                                               	# Columns
    if(vline) segments(i+rset+norm*vlinex, .5+uset, i+rset+norm*vlinex, h+uset+.5, col=vcol) # draw vertical lines at vlinex	
    for(j in 1:((i-1)*diag+(1-diag)*h))							# Rows		
    {
      if(is.na(b[j,i])==FALSE){
        segments(max(i+rset+ b_[j,i]-z*se_[j,i], i+rset -.4), j+uset, min(i+ rset+ b_[j,i]+z*se_[j,i],i+ rset+ .4), j+uset)
        if(is.na(p[j,i])==FALSE) {text(i+rset, j+1.25*uset, paste("p=",  formatC(p[j,i], format = "f", digits = pdigit), sep=""), cex=.75*textsize)}
        # Add in 95% ticks
        if(ninety==1){
          if(i+rset+ b_[j,i]-z2*se_[j,i]> i+rset -.4){segments(i+rset+ b_[j,i]-z2*se_[j,i], j+uset- gap/4, i+rset+ b_[j,i]-z2*se_[j,i], j+uset+gap/4)}
          if(i+ rset+ b_[j,i]+z2*se_[j,i]< i+ rset+ .4){segments(i+rset+ b_[j,i]+z2*se_[j,i], j+uset- gap/4, i+rset+ b_[j,i]+z2*se_[j,i], j+uset+gap/4)}
        }						
        if((i+rset+ b_[j,i]>i+rset -.4)&(i+rset+ b_[j,i]<i+rset +.4)){points(i+rset+ b_[j,i], j+uset, pch=19)}				# Points		
        if(groups==2){ 
          segments(max(i+rset+ b_[j,i+w]-z*se_[j,i+w],i+rset -.4), j+uset-gap, min(i+ rset+ b_[j,i+w]+z*se_[j,i+w], i+ rset+ .4), j+uset-gap)
          if((i+rset+ b_[j,i+w]>i+rset -.4)&(i+rset+ b_[j,i+w]<i+rset +.4)){points(i+rset+ b_[j,i+w], j+uset - gap, pch=21, bg="white")}				# Points		
          # Add in 95% ticks							
          if(ninety==1){
            if(i+rset+ b_[j,i+w]-z2*se_[j,i+w]> i+rset -.4){segments(i+rset+ b_[j,i+w]-z2*se_[j,i+w], j+uset- 5*gap/4, i+rset+ b_[j,i+w]-z2*se_[j,i+w], j+uset-3*gap/4)}
            if(i+ rset+ b_[j,i+w]+z2*se_[j,i+w]< i+ rset+ .4){segments(i+rset+ b_[j,i+w]+z2*se_[j,i+w], j+uset- 5*gap/4, i+rset+ b_[j,i+w]+z2*se_[j,i+w], j+uset-3*gap/4)}}
        }}
      segments(i +rset -.4, .5+uset, i+ rset+.4, .5+uset)
      text(i +rset, .35+uset, center, cex=ticktextsize)
      text(i+rset+ tick*norm, .35+uset, tick+center, cex=ticktextsize)  # Major tick
      text(i+rset-tick*norm, .35+uset, -tick+center, cex=ticktextsize)
      segments(i+rset+tick*norm, .5+uset, i+rset+tick*norm, .45+uset)
      segments(i+rset-tick*norm, .5+uset, i+rset-tick*norm, .45+uset)
      text(i+rset+ tick2*norm, .35+uset, tick2+center, cex=ticktextsize)    # Minor  tick
      text(i+rset-tick2*norm, .35+uset, -tick2+center, cex=ticktextsize)
      segments(i+rset+tick2*norm, .5+uset, i+rset+tick2*norm, .45+uset)
      segments(i+rset-tick2*norm, .5+uset, i+rset-tick2*norm, .45+uset)
      segments(i+rset, .5+uset, i+rset, .45+uset)
    }}
  text(0+rnamestab, (1+uset):(h+uset) + gap*(1-groups)/2, rnames, cex = textsize, adj=c(0,.5))
  if(groups==2) {text(rset+rset2, sapply(1:h, function(i) c(i+uset, i+uset-gap)), rnames_sub, cex = textsize, adj=c(0,.5))}
  text((1+rset):(w+rset),h+uset+.75,  cnames[1:w], cex = textsize)
}	



